home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
C/C++ Users Group Library 1996 July
/
C-C++ Users Group Library July 1996.iso
/
vol_300
/
333_02
/
awktab.y
< prev
next >
Wrap
Text File
|
1989-02-18
|
30KB
|
1,293 lines
/***************************************************************************/
/* gawk -- GNU version of awk */
/* YACC input file to create the gAWK semantic parser */
/* */
/* Copyright (C) 1986 Free Software Foundation */
/* Written by Paul Rubin, August 1986 */
/* */
/***************************************************************************/
/* */
/* GAWK is distributed in the hope that it will be useful, but WITHOUT ANY */
/* WARRANTY. No author or distributor accepts responsibility to anyone */
/* for the consequences of using it or for whether it serves any */
/* particular purpose or works at all, unless he says so in writing. */
/* Refer to the GAWK General Public License for full details. */
/* */
/* Everyone is granted permission to copy, modify and redistribute GAWK, */
/* but only under the conditions described in the GAWK General Public */
/* License. A copy of this license is supposed to have been given to you */
/* along with GAWK so you can know your rights and responsibilities. It */
/* should be in a file named COPYING. Among other things, the copyright */
/* notice and this notice must be preserved on all copies. */
/* */
/* In other words, go ahead and share GAWK, but don't try to stop */
/* anyone else from sharing it farther. Help stamp out software hoarding! */
/* */
/***************************************************************************/
%{
#define YYDEBUG 12
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include "awk.h"
STATIC int NEAR PASCAL yylex(void);
STATIC int NEAR PASCAL parse_escape(char **string_ptr);
/* The following variable is used for a very sickening thing. The awk */
/* language uses white space as the string concatenation operator, but */
/* having a white space token that would have to appear everywhere in all */
/* the grammar rules would be unbearable. It turns out we can return */
/* CONCAT_OP exactly when there really is one, just from knowing what */
/* kinds of other tokens it can appear between (namely, constants, */
/* variables, or close parentheses). This is because concatenation has */
/* the lowest priority of all operators. want_concat_token is used to */
/* remember that something that could be the left side of a concat has */
/* just been returned. If anyone knows a cleaner way to do this (don't */
/* look at the Un*x code to find one, though), please suggest it. */
static int want_concat_token;
/* Two more horrible kludges. The same comment applies to these two too */
static int want_regexp = 0; /* lexical scanning kludge */
int lineno = 1; /* JF for error msgs */
/* During parsing of a gAWK program, the pointer to the next character */
/* is in this variable. */
char *lexptr;
char *lexptr_begin;
%}
%union
{
long lval;
AWKNUM fval;
NODE *nodeval;
int nodetypeval;
char *sval;
NODE *(PASCAL *ptrval)(NODE *);
}
%type <nodeval> exp start program rule pattern conditional regexp
%type <nodeval> action variable redirect_in redirect_out exp_list builtin
%type <nodeval> statements statement if_statement opt_exp
%type <nodetypeval> whitespace relop
%token <sval> NAME REGEXP YSTRING
%token <lval> ERROR INCDEC
%token <fval> NUMBER
%token <nodetypeval> ASSIGNOP MATCHOP NEWLINE CONCAT_OP
%token <nodetypeval> LEX_BEGIN LEX_END LEX_IF LEX_ELSE
%token <nodetypeval> LEX_WHILE LEX_FOR LEX_BREAK LEX_CONTINUE LEX_DELETE
%token <nodetypeval> LEX_PRINT LEX_PRINTF LEX_NEXT LEX_EXIT
%token <nodetypeval> RELOP_EQ RELOP_GEQ RELOP_LEQ RELOP_NEQ REDIR_APPEND
%token LEX_IN
%token <lval> LEX_AND LEX_OR INCREMENT DECREMENT
%token <ptrval> LEX_BUILTIN LEX_MATCH_FUNC LEX_SUB_FUNC LEX_SPLIT_FUNC
%token <ptrval> LEX_GETLINE
/* Lowest to highest */
%right ASSIGNOP
%left ','
%right '?' ':'
%left LEX_OR
%left LEX_AND
%left CONCAT_OP
%nonassoc MATCHOP '>' '<' RELOP_EQ RELOP_GEQ RELOP_LEQ RELOP_NEQ
%left '+' '-'
%left '*' '/' '%'
%right UNARY
%right '^'
%%
start : optional_newlines program
{
expression_value = $2;
}
;
program : rule
{
$$ = node($1, NODE_RULE_LIST, NULL);
}
| program rule
{ /* cons the rule onto the tail of list */
$$ = append_right($1, node($2, NODE_RULE_LIST, NULL));
}
;
rule : pattern action NEWLINE optional_newlines
{
$$ = node($1, NODE_RULE_NODE, $2);
}
;
pattern : /* empty */
{
$$ = NULL;
}
| LEX_BEGIN
{
$$ = node(NULL, NODE_K_BEGIN, NULL);
}
| LEX_END
{
$$ = node(NULL, NODE_K_END, NULL);
}
| conditional
{
$$ = $1;
}
| conditional ',' conditional
{
$$ = mkrangenode(node($1, NODE_COND_PAIR, $3));
}
;
conditional :
'!' conditional %prec UNARY
{
$$ = node($2, NODE_NOT, NULL);
}
| '(' exp_list ')' CONCAT_OP LEX_IN NAME
{
$$ = node(variable($6), NODE_MEMBER_COND, $2);
}
| exp CONCAT_OP LEX_IN NAME
{
$$ = node(variable($4), NODE_MEMBER_COND, $1);
}
| conditional LEX_AND conditional
{
$$ = node($1, NODE_AND, $3);
}
| conditional LEX_OR conditional
{
$$ = node ($1, NODE_OR, $3);
}
| '(' conditional ')'
{
$$ = $2;
want_concat_token = 0;
}
| regexp
{
$$ = $1;
}
| exp MATCHOP regexp
{
$$ = node($1, $2, $3);
}
| exp MATCHOP variable
{
$$ = node($1, $2, $3);
}
| exp relop exp
{
$$ = node($1, $2, $3);
}
;
action : /* empty */
{
$$ = NULL;
}
| '{' whitespace statements '}'
{
$$ = $3;
}
;
statements :
statement
{
$$ = node($1, NODE_STATEMENT_LIST, NULL);
}
| statements statement
{
$$ = append_right($1, node($2, NODE_STATEMENT_LIST, NULL));
}
;
statement_term :
NEWLINE optional_newlines
{
$<nodetypeval>$ = NODE_ILLEGAL;
}
| ';' optional_newlines
{
$<nodetypeval>$ = NODE_ILLEGAL;
}
;
regexp : '/'
{
++want_regexp;
}
REGEXP '/'
{
want_regexp = 0;
$$ = node(NULL, NODE_REGEXP, (NODE *) make_regexp($3));
}
relop : '>'
{
$$ = NODE_GREATER;
}
| '<'
{
$$ = NODE_LESS;
}
| RELOP_EQ
{
$$ = NODE_EQUAL;
}
| RELOP_GEQ
{
$$ = NODE_GEQ;
}
| RELOP_LEQ
{
$$ = NODE_LEQ;
}
| RELOP_NEQ
{
$$ = NODE_NOTEQUAL;
}
;
whitespace :
/* blank */
{
$$ = NODE_ILLEGAL;
}
| CONCAT_OP
| NEWLINE
| whitespace CONCAT_OP
| whitespace NEWLINE
;
statement :
'{' whitespace statements '}' whitespace
{
$$ = $3;
}
| if_statement
{
$$ = $1;
}
| LEX_WHILE '(' conditional ')' whitespace statement
{
$$ = node($3, NODE_K_WHILE, $6);
}
| LEX_FOR '(' opt_exp ';' conditional ';' opt_exp ')' whitespace statement
{
$$ = node($10, NODE_K_FOR, (NODE *) make_for_loop($3, $5, $7));
}
| LEX_FOR '(' opt_exp ';' ';' opt_exp ')' whitespace statement
{
$$ = node($9, NODE_K_FOR,
(NODE *) make_for_loop($3, NULL, $6));
}
| LEX_FOR '(' NAME CONCAT_OP LEX_IN NAME ')' whitespace statement
{
$$ = node($9, NODE_K_ARRAYFOR,
(NODE *) make_for_loop(variable($3),
NULL, variable($6)));
}
| LEX_BREAK statement_term
/* for break, maybe we'll have to remember where to break to */
{
$$ = node(NULL, NODE_K_BREAK, NULL);
}
| LEX_CONTINUE statement_term
/* similarly */
{
$$ = node(NULL, NODE_K_CONTINUE, NULL);
}
| LEX_PRINT exp_list redirect_out statement_term
{
$$ = node($2, NODE_K_PRINT, $3);
}
| LEX_PRINT '(' exp_list ')' /* BW: print(...) */
{